####################################################################################
## xu xian feng
## 2023/04/15
## STAD analysis
####################################################################################

source ~/20220915_gastric_multiple/dna_combinePublic/config/config.sh
export config_file=~/20220915_gastric_multiple/dna_combinePublic/config/config.sh

mkdir -p ${work_dir}/finalPlot
mkdir -p ${work_dir}/finalPlot/Plot_Article/

####################################################################################
## 第一部分
## 研究设计 + 基线特征 + 分子特征
####################################################################################
mkdir -p ${work_dir}/finalPlot/study_design/
mkdir -p ${work_dir}/finalPlot/molecular_type/
mkdir -p ${work_dir}/finalPlot/Plot_Article/Part1

##########################################
## 模式图
## 1、不同分子亚型突变负荷，对应的Figure1的右边突变负荷的左图，IGC和DGC的比较
## MutationBurden.compare.IGC_DGC.MolecularType.All.mode.pdf
${Rscript} ${scripts_path}/plot/mutBurden.MolecularType.mode.R \
--input_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--out_path ${work_dir}/finalPlot/study_design

## 2、肠化的TMB和饮酒，对应的Figure1的右边突变负荷的右图，饮酒对不同IM的突变负荷的比较
## mutBurden.Alcohol.cds.IM.MolType.mode.pdf
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.Baseline.mode.R \
--input_file ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--images_path ${work_dir}/finalPlot/study_design

## 3、突变瀑布图，对应的Figure1的右边的驱动基因图谱图
## Mut_WaterFall.new.mode.pdf
${Rscript} ${scripts_path}/plot/waterfull_smg.new_v2.mode.R \
--maf_path ${maf_public_path} \
--images_path ${work_dir}/finalPlot/study_design/ \
--info_file ${config_path}/tumor_normal.class.list \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--class_order_file ${config_path}/Class_order.list \
--im_list ${mutsig_check_path}/im_smg.list \
--igc_list ${mutsig_check_path}/igc_smg.addEvolution.list \
--dgc_list ${mutsig_check_path}/dgc_smg.addEvolution.list

## 4、进化模式，对应的Figure1的右边的演化因图谱图
## GeneTrunk.evolutionChoose.ratio.IGC.mode.pdf
## GeneTrunk.evolutionChoose.ratio.DGC.mode.pdf
clone_t=0.6
choose_rate=0
class_type_list=("IGC" "DGC")
for class_type in ${class_type_list[@]}
do
## 驱动基因的共享和私有情况
${Rscript} ${scripts_path}/plot/JudgeGeneDriverSharePrivate.mode.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--gene_list ${mutsig_check_path}/smg.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--type ${class_type} \
--clone_t ${clone_t} \
--choose_rate ${choose_rate} \
--out_path ${work_dir}/finalPlot/study_design
done


##########################################
## 比较不同来源样本的基线，产生对应的表1（STAD-CombinePublic.BaseLineCompare.csv）
cat ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv | grep -v "IGC + DGC" \
> ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.rmMIX.tsv

${Rscript_mutationTime} ${scripts_path}/finalPlot/CompareBaseLine.CombinePublic.R \
--input_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.rmMIX.tsv \
--out_dir ${work_dir}/finalPlot/study_design/

## 比较分子亚型在IGC和DGC中的不同来源，对应附图2
${Rscript} ${scripts_path}/plot/ratio_molecular_subtype.R \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.rmMIX.tsv \
--out_path ${work_dir}/finalPlot/molecular_type

##########################################
## IM + IGC + DGC的样本去掉
cat ${config_path}/tumor_normal.class.MSS_MSI.list | grep -v "IM + IGC + DGC" \
> ${config_path}/tumor_normal.class.MSS_MSI.rmMIX.list

## 分子亚型标记文件去除
cat ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv | grep -v "IM + IGC + DGC" \
> ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.rmMIX.tsv

## RNA的去除MIX
${Rscript} ${scripts_path}/plot/mrna_delmix.R \
--info_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--mrna_file ${work_dir}/mRNA/NJSCC.DNAUse.53_157.tsv \
--out_file ${work_dir}/mRNA/NJSCC.DNAUse.48_172.tsv

## CCF的去除MIX
mix_sample=`cat ${config_path}/tumor_normal.class.MSS_MSI.list | grep "IM + IGC + DGC" | awk -F'\t' '{print $3}' | tr '\n' '|' | sed 's/|$//'`
cat ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv | grep -v -E -w ${mix_sample} \
> ${MutationTime_path}/result/All_CCF_mutTime.addShare.rmMIX.tsv

cat ${MutationTime_path}/result/All_CCF_mutTime.addShare.rmMIX.tsv | sed '1d' | wc -l
cat ${MutationTime_path}/result/All_CCF_mutTime.MSI.tsv | sed '1d' | wc -l

## 统计IM，IGC和DGC各自的样本量
## WES
IM_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.rmMIX.list | grep IM- | wc -l`
IGC_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.rmMIX.list | grep IGC- | wc -l`
DGC_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.rmMIX.list | grep DGC- | wc -l`

echo "IM_sample_num,IGC_sample_num,DGC_sample_num"
echo ${IM_sample_num},${IGC_sample_num},${DGC_sample_num}

## RNA
use_sample=`cat ${mRNA_path}/NJSCC.DNAUse.48_172.tsv | head -1 | tr '\t' '\n' | grep -v gene | awk -F'_' '{print $1}' | 
sort -u  | tr '\n' '|' | sed 's/|$//'`
IM_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.rmMIX.list | grep -E -w ${use_sample} | grep IM- | wc -l`
IGC_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.rmMIX.list | grep -E -w ${use_sample} | grep IGC- | wc -l`
DGC_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.rmMIX.list | grep -E -w ${use_sample} | grep DGC- | wc -l`
Normal_sample_num=`cat ${mRNA_path}/NJSCC.DNAUse.48_172.tsv | head -1 | tr '\t' '\n' | grep Normal | wc -l`

echo "Normal_sample_num,IM_sample_num,IGC_sample_num,DGC_sample_num"
echo ${Normal_sample_num},${IM_sample_num},${IGC_sample_num},${DGC_sample_num}


##########################################
## CNV的染色体分布
## NJMU和TCGA的IGC、DGC的两种CNV亚型改变
${Rscript} ${scripts_path}/plot/cnvDistribution_plot.combine.R \
--sample_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--seg_file ${Titan_path}/Titan_all_seg.final.tsv \
--sample_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--seg_public_file ${work_dir}/seg_public/TCGA_use.seg \
--class_order_file ${config_path}/Class_order.list \
--images_path ${work_dir}/finalPlot/molecular_type/cnv_burden


##########################################
## 突变负荷比较
## IGC和DGC的突变负荷在MSS中比较，对应的Figure2的a图
## MutationBurden.combine.IGC_DGC.MSS.pdf
## MutationBurden.combine.IGC_DGC.MSS.tsv
## 不同来源的比较，对应的Supplyment Fig1
## MutationBurden.compare.IGC_DGC.MSS.pdf
${Rscript} ${scripts_path}/comparePublicData/CombinePublicData.R \
--njmu_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.tsv \
--tcga_file ${work_dir}/public_ref/TCGA/TCGA_STAD.TMB.tsv \
--tcga_clinic_file ${work_dir}/public_ref/TCGA/clinical_PANCAN_patient_with_followup.tsv \
--tcga_msi_file  ${work_dir}/public_ref/TCGA/TCGA_STAD_msi.TMB.tsv \
--oncoSG_file ${work_dir}/public_ref/OncoSG/OncoSG_STAD.TMB.tsv \
--oncoSG_clinic_file ${work_dir}/public_ref/OncoSG/OncoSG_STAD.followup.tsv \
--oncoSG_msi_file ${work_dir}/public_ref/OncoSG/OncoSG_STAD_msi.TMB.tsv \
--tmucih_file ${work_dir}/public_ref/TMUCIH/TMUCIH_STAD.TMB.tsv \
--tmucih_msi_file ${work_dir}/public_ref/TMUCIH/TMUCIH_STAD_msi.TMB.tsv \
--utokyo_file ${work_dir}/public_ref/utokyo/utokyo_STAD.TMB.tsv \
--out_path ${work_dir}/finalPlot/molecular_type/mutBurden


##########################################
## IGC和DGC的突变负荷在不同分子亚型中比较，对应的Figure2的b图
## MutationBurden.compare.IGC_DGC.MolecularType.All.pdf
## 不同来源的比较，对应的Supplyment Fig2
## MutationBurden.compare.IGC_DGC.MolecularType.NJMU.pdf
## MutationBurden.compare.IGC_DGC.MolecularType.OncoSG.pdf
## MutationBurden.compare.IGC_DGC.MolecularType.TCGA.pdf
${Rscript} ${scripts_path}/plot/mutBurden.MolecularType.R \
--input_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--out_path ${work_dir}/finalPlot/molecular_type/mutBurden


##########################################
<<EOF
## 未用
## 森林图比较吸烟、饮酒和HP对突变负荷的影响，对应的Supplyment Fig3
molecular_type_list=("All" "GS" "CIN")
for molecular_type in ${molecular_type_list[@]}
do
${Rscript} ${scripts_path}/finalPlot/DescribeMutBurden.logistic.R \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--molecular_type ${molecular_type} \
--images_path ${work_dir}/finalPlot/molecular_type/logistic

## logistic回归,森林图可视化
${Rscript} ${scripts_path}/finalPlot/DescribeMutBurdenRatio.logistic.plot.R \
--input_burden_file ${work_dir}/finalPlot/molecular_type/logistic/baseInfo_mutBurden.${molecular_type}.tsv \
--molecular_type ${molecular_type} \
--out_path ${work_dir}/finalPlot/molecular_type/logistic
done
EOF

## 2、IM,IGC DGC的TMB和吸烟、饮酒和HP, supply figure3
## 饮酒的对应Figure3的D图
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.Baseline.R \
--input_file ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--images_path ${work_dir}/finalPlot/molecular_type/mutBurden/baseline

##########################################
## 同一个人配对IM、IGC和DGC的突变负荷的比较，只画MSS患者且不包含5例IGC和DGC均有的样本，对应的Figure3的a图
## mutBurden.Type.IM_IGC-IM_DGC.MSS.pdf
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.addMSI.R \
--info_file ${config_path}/STAD-useCombine.Sample.tsv \
--maf_file ${maf_path}/All_ForMutBurden.extract.maf \
--maf_msi_file ${maf_path}/All_ForMutBurden.extract.MSI.maf \
--images_path ${work_dir}/finalPlot/molecular_type/mutBurden


##########################################
## MSI、GS、CIN的IM的突变负荷比较，对应的Figure3的c图左边
## mutBurden.IM.TCGA_Type.GS_CIN_MSI.All.All.pdf
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.IM_MolecularType.R \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--images_path ${work_dir}/finalPlot/molecular_type/mutBurden 


##########################################
## MSI、GS、CIN的IM的共享突变的CCF分析，对应的Figure3的c图右边和d图右边
## mutBurden.IM.TCGA_Type.GS_CIN_MSI.MutCCFRate.Share.All_Drink.pdf
## 同时还计算吸烟、饮酒和HP感染的突变负荷差异比较
${Rscript} ${scripts_path}/plot/mutBurden.MolecularType.ShareRate.R \
--mutshare_file ${Images_path}/mutRate/MutShare.AllPoint.tsv \
--mutshare_msi_file ${Images_path}/mutRateMSI/MutShare.AllPoint.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--ccf_msi_file ${MutationTime_path}/result/All_CCF_mutTime.MSI.tsv \
--input_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--images_path ${work_dir}/finalPlot/molecular_type/mutBurden/baseline


########################################## 
## 突变信号
## denovo提取4个突变信号，对应的Supplyment Fig3的a图
## sigProfiler/extractor_allUSE_AllMut_MSS/decompose/SBS96/De_Novo_Solution/Signatures/SBS_96_plots_De_Novo.pdf
sh ${scripts_path}/module/sigprofile_AllVcf.AllMut.sh

## 计算每个样本，突变信号的占比
## combine_SBS96.ratio.addInfo.tsv
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.GetRatio.AllMut.R \
--sig_msi_file ${SigProfiler_path}/decompose_allUSE_AllMut_MSI/combine_SBS96.txt \
--sig_mss_file ${SigProfiler_path}/decompose_allUSE_AllMut_MSS/combine_SBS96.txt \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--images_path ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut

## 突变信号在共享和私有，IGC和DGC构成，其不同研究来源也画出来
mol_type_list=("MSS" "CIN" "GS")
for mol_type in ${mol_type_list[@]}
do
## NJMU中看share、trunk和private突变，其突变信号组成，对应的Supplyment Fig3的b图
## Mutation_Signature.decompose.NJMU.MSS.pdf
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.Share.R \
--input_file ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/combine_SBS96.ratio.addInfo.tsv \
--mol_type ${mol_type} \
--images_path ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut

## 所有样本中分总体和不同分子亚型，比较IGC和DGC其突变信号组成，对应的Supplyment Fig4的c和d图
## Mutation_Signature.decompose.NJMU.MSS.pdf，Mutation_Signature.decompose.NJMU.MSS.tsv
## Mutation_Signature.decompose.NJMU.GS.pdf，Mutation_Signature.decompose.NJMU.GS.tsv
## Mutation_Signature.decompose.NJMU.CIN.pdf，Mutation_Signature.decompose.NJMU.CIN.tsv
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.From.R \
--input_file ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/combine_SBS96.ratio.addInfo.tsv \
--mol_type ${mol_type} \
--images_path ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut
done

## 检测饮酒对GS亚型IM突变信号的影响
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.drink.R \
--input_file ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/combine_SBS96.ratio.addInfo.tsv \
--mol_type GS \
--images_path ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut

##########################################
#### CNV比较
## CNV分布图，IM、IGC和DGC，对应的Supplyment Fig4
## CopyRatioDis.pdf
## CNV在NJMU和TCGA的不同分子亚型的分布图，分IGC和DGC，对应的Supplyment Fig6
## CopyRatioDis.IGC.NJMU.pdf
## CopyRatioDis.DGC.NJMU.pdf
## CopyRatioDis.IGC.TCGA.pdf
## CopyRatioDis.DGC.TCGA.pdf
${Rscript} ${scripts_path}/plot/cnvDistribution_plot.combine.R \
--sample_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--seg_file ${Titan_path}/Titan_all_seg.final.tsv \
--sample_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--seg_public_file ${work_dir}/seg_public/TCGA_use.seg \
--class_order_file ${config_path}/Class_order.list \
--images_path ${work_dir}/finalPlot/molecular_type/cnv_burden

## NJMU和TCGA的IGC、DGC的不同分子亚型的CNV改变情况，对应的Figure2的c图
## CNV_burden.IGC_DGC.All.pdf
## CNV_burden.IGC_DGC.CIN.All.pdf
## CNV_burden.IGC_DGC.GS.All.pdf
## CNV_burden.IGC_DGC.MSI.All.pdf
from_type=("All" "NJMU" "TCGA")
for from in ${from_type[@]}
do
${Rscript} ${scripts_path}/plot/cnvBurden_plot.combine.R \
--from ${from} \
--sample_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--seg_file ${Titan_path}/Titan_all_seg.final.tsv \
--sample_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--seg_public_file ${work_dir}/seg_public/TCGA_use.seg \
--class_order_file ${config_path}/Class_order.list \
--images_path ${work_dir}/finalPlot/molecular_type/cnv_burden
done

## 在NJMU的配对的IM-IGC以及配对IM-DGC，突变负荷的比较，对应的Figure3的b图
## CNV_burden.IM_GC.All.pdf
${Rscript} ${scripts_path}/plot/cnvBurden_plot.IM.R \
--sample_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--seg_file ${Titan_path}/Titan_all_seg.final.tsv \
--class_order_file ${config_path}/Class_order.list \
--images_path ${work_dir}/finalPlot/molecular_type/cnv_burden


####################################################################################
#### 软链接
## 模式图
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.compare.IGC_DGC.MolecularType.All.mode.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IGC_DGC.MolecularType.mode.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/baseline/mutBurden.Alcohol.cds.IM.MolType.mode.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM.TCGA_Type.Alcohol.mode.pdf
ln -snf ${work_dir}/finalPlot/SMG_Waterfull/Mut_WaterFall.new.mode.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.waterfull_smg.mode.pdf

## 不同分子亚型的CNV在NJMU和TCGA的比较
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CopyRatioDis.DGC.NJMU.pdf ${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.cnv_njmu.DGC.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CopyRatioDis.IGC.NJMU.pdf ${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.cnv_njmu.IGC.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CopyRatioDis.DGC.TCGA.pdf ${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.cnv_tcga.DGC.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CopyRatioDis.IGC.TCGA.pdf ${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.cnv_tcga.IGC.pdf

## MSS稳定的IGC和DGC的突变负荷
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.combine.IGC_DGC.MSS.pdf ${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IGC_DGC.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.combine.IGC_DGC.MSS.tsv ${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IGC_DGC.tsv
## 不同来源
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.compare.IGC_DGC.MSS.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.IGC_DGC.From.pdf

## 不同分子亚型的突变负荷
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.compare.IGC_DGC.MolecularType.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IGC_DGC.MolecularType.All.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.compare.IGC_DGC.MolecularType.All.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IGC_DGC.MolecularType.All.tsv
## 不同来源
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.compare.IGC_DGC.MolecularType.NJMU.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.IGC_DGC.MolecularType.NJMU.pdf
## 不同来源
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.compare.IGC_DGC.MolecularType.OncoSG.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.IGC_DGC.MolecularType.OncoSG.pdf
## 不同来源
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/MutationBurden.compare.IGC_DGC.MolecularType.TCGA.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.IGC_DGC.MolecularType.TCGA.pdf

## 森林图展示基线对突变负荷的影响
ln -snf ${work_dir}/finalPlot/molecular_type/logistic/baseInfo_coef.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.baseInfo_coef.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/logistic/baseInfo_coef.All.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.baseInfo_coef.tsv
## 不同分子亚型
ln -snf ${work_dir}/finalPlot/molecular_type/logistic/baseInfo_coef.GS.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.baseInfo_coef.GS.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/logistic/baseInfo_coef.GS.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.baseInfo_coef.GS.tsv
ln -snf ${work_dir}/finalPlot/molecular_type/logistic/baseInfo_coef.CIN.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.baseInfo_coef.CIN.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/logistic/baseInfo_coef.CIN.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.baseInfo_coef.CIN.tsv


## IM、IGC和DGC不同分子亚型和吸烟、饮酒以及HP的关系的关系
for file in  `ls ${work_dir}/finalPlot/molecular_type/mutBurden/baseline/ | grep .cds. | grep -v IM | grep -v -E "Age|Gender"`
do
echo ${file}
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/baseline/${file} ${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.${file}
done


## 同一患者配对IM的TMB
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/mutBurden.Type.IM_IGC-IM_DGC.MSS.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM_IGC-IM_DGC.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/mutBurden.Type.IM_IGC-IM_DGC.MSS.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM_IGC-IM_DGC.tsv
## 不同IM的比较
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/mutBurden.Type.IM.MSS.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.IM_Compare.pdf

## 不同分子亚型IM的TMB
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/mutBurden.IM.TCGA_Type.GS_CIN_MSI.All.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM.TCGA_Type.GS_CIN_MSI.All.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/mutBurden.IM.TCGA_Type.GS_CIN_MSI.All.All.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM.TCGA_Type.GS_CIN_MSI.All.tsv
## 不同胃癌亚型中不同分子亚型IM的比较
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/mutBurden.IM.TCGA_Type.GS_CIN_MSI.All.IGC_DGC.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.IM.TCGA_Type.GS_CIN_MSI.IGC_DGC.pdf

## IM突变负荷与饮酒关系
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/baseline/mutBurden.Alcohol.cds.IM.MolType.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM.TCGA_Type.Alcohol.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/baseline/mutBurden.Alcohol.cds.IM.MolType.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM.TCGA_Type.Alcohol.tsv

## 不同分子亚型IM，其共享突变的CCF占比
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/baseline/mutBurden.IM.TCGA_Type.GS_CIN_MSI.MutCCFRate.Share.All_Drink.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM.TCGA_Type.MutCCFRate.Share.Alcohol.All_Drink.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/baseline/mutBurden.IM.TCGA_Type.GS_CIN_MSI.MutCCFRate.Share.Drink.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.IM.TCGA_Type.MutCCFRate.Share.Alcohol.Share.Drink.tsv
## 附图
ln -snf ${work_dir}/finalPlot/molecular_type/mutBurden/baseline/mutBurden.IM.TCGA_Type.GS_CIN_MSI.MutCCFRate.Share.Non-Drink.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.IM.TCGA_Type.MutCCFRate.Share.Alcohol.Non-Drink.pdf


## 突变信号构成
ln -snf ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/SBS_96_plots_SBS96_De-Novo.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.SBS_96_plots_SBS96_De-Novo.pdf
## 总体的突变信号，不同来源
ln -snf ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/Mutation_Signature.decompose.Combine.MSS.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.Mutation_Signature.decompose.Combine.MSS.pdf
## NJMU中共享和私有的突变信号
ln -snf ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/Mutation_Signature.decompose.NJMU.MSS.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.Mutation_Signature.decompose.NJMU.MSS.pdf
## 不同分子亚型的
ln -snf ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/Mutation_Signature.decompose.Combine.GS.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.Mutation_Signature.decompose.GS.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/Mutation_Signature.decompose.Combine.CIN.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.Mutation_Signature.decompose.CIN.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/decompose_allUSE_AllMut/Mutation_Signature.decompose.Combine.MSI.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.Mutation_Signature.decompose.MSI.pdf

## 拷贝数分布图
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CopyRatioDis.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.CopyRatioDis.pdf
## 拷贝数改变负荷
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CNV_burden.IGC_DGC.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.CNV_burden.IGC_DGC.All.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CNV_Burden_ALL.All.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.CNV_burden.IGC_DGC.All.tsv

## 不同亚型
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CNV_burden.IGC_DGC.CIN.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.CNV_burden.IGC_DGC.CIN.All.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CNV_Burden_ALL.All.Molecular.subtype.tsv \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.CNV_burden.IGC_DGC.CIN.All.tsv

ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CNV_burden.IGC_DGC.GS.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.CNV_burden.IGC_DGC.GS.All.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CNV_burden.IGC_DGC.CIN.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.CNV_burden.IGC_DGC.CIN.All.pdf
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CNV_burden.IGC_DGC.MSI.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Supplementary_Fig.CNV_burden.IGC_DGC.MSI.All.pdf

## IM的拷贝数比较
ln -snf ${work_dir}/finalPlot/molecular_type/cnv_burden/CNV_burden.IM_GC.All.pdf \
${work_dir}/finalPlot/Plot_Article/Part1/Figure1.CNV_burden.IM.pdf


##########################################
## 转录组部分,53个患者，共198个样本
export rsme_path=~/20220915_gastric_multiple/rna_combine/RSEM/

##此处更改了特定的tumor_normal.list，其中只包含53例中的样本
echo "sample" > ${mRNA_path}/newAnalyse/patients.csv
cat ${mRNA_path}/newAnalyse/tumor_normal.list | sed '1d' | awk -F'\t' '{print $2,$3}' | tr ' ' '\n' | sort -u | grep -v "#N/A" \
>> ${mRNA_path}/newAnalyse/patients.csv

#########################################
## TPM
## 合并表达矩阵
${Rscript} ${scripts_path}/mRNA_addanalyse/CollapseGeneTPM.R \
--sample_list_file ${mRNA_path}/newAnalyse/patients.csv  \
--rsem_path ${rsme_path} \
--out_file ${mRNA_path}/newAnalyse/CombineTPM.tsv

## 同一人同一类型的样本合并表达矩阵，去除低表达，注释Hugo_Symbol
## 只保留Normal、IM、IGC、DGC（混合样本删除）
${Rscript} ${scripts_path}/mRNA_addanalyse/mergeClassTpm.R \
--sample_list_file ${mRNA_path}/newAnalyse/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--rsem_file ${mRNA_path}/newAnalyse/CombineTPM.tsv \
--out_file ${mRNA_path}/newAnalyse/CombineTpm.FilterLowExpression-MergeMutiSample.tsv

###########################################
#### counts用于差异表达分析
#### 合并矩阵
${Rscript} ${scripts_path}/mRNA_addanalyse/CollapseGeneCounts.R \
--sample_list_file ${mRNA_path}/newAnalyse/patients.csv  \
--rsem_path ${rsme_path} \
--out_file ${mRNA_path}/newAnalyse/CombineCounts.tsv

## 同一人同一类型的样本合并counts矩阵，注释Hugo_Symbol
## 去除tpm低表达的基因
## 只保留Normal、IM、IGC、DGC
${Rscript} ${scripts_path}/mRNA_addanalyse/mergeClassCounts.R \
--sample_list_file ${mRNA_path}/newAnalyse/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--rsem_file ${mRNA_path}/newAnalyse/CombineCounts.tsv \
--filter_tpm_file ${mRNA_path}/newAnalyse/CombineTpm.FilterLowExpression-MergeMutiSample.tsv \
--out_file ${mRNA_path}/newAnalyse/CombineCounts.FilterLowExpression-MergeMutiSample.tsv

#####################################################################################
### IM和Normal、IGC和Normal、DGC和Normal的差异基因可视化
export foldchange_t=1.5
export q_t=0.05

## Supplyment Fig9
##下面针对各亚型，在IGC和DGC中差异表达，可视化热图和通路富集
for subtype in `echo -e "All\nCIN\nGS\nMSI"`
do
echo ${subtype}
${Rscript_clusterProfiler} ${scripts_path}/mRNA_addanalyse/diffGene_compute_match_IGC_DGC.R \
--sample_list_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${work_dir}/finalPlot/mrna/DiffGene/IGC_DGC/${subtype} \
--rsem_file ${work_dir}/finalPlot/mrna/DiffGene/CombineCounts.FilterLowExpression-MergeMutiSample.TMM.tsv \
--foldchange_t ${foldchange_t} \
--q_t ${q_t} \
--subtype ${subtype}
done

#####################################################################################
## Supplyment Fig11
## Normal-IM-IGC和Normal-IM-DGC的样本进行时序分析
## IGC配对样本和DGC配对样本分开

export foldchange_t=1.5
export q_t=0.05

## 差异表达计算
${Rscript} ${scripts_path}/mRNA_addanalyse/diffGene_compute_v3.R \
--sample_list_file ${mRNA_path}/newAnalyse/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${work_dir}/finalPlot/mrna/DiffGene \
--rsem_file ${mRNA_path}/newAnalyse/CombineCounts.FilterLowExpression-MergeMutiSample.tsv

## 选取差异表达基于进行时序分析
${Rscript_expressionTime} ${scripts_path}/mRNA_addanalyse/time_Mfuzz_v2.R \
--sample_list_file ${mRNA_path}/newAnalyse/tumor_normal.list  \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--out_path ${work_dir}/finalPlot/mrna/mfuzz_v2 \
--diff_file ${work_dir}/finalPlot/mrna/DiffGene/DiffGene.tsv \
--foldchange_t ${foldchange_t} \
--q_t ${q_t} \
--rsem_file ${work_dir}/finalPlot/mrna/DiffGene/CombineCounts.FilterLowExpression-MergeMutiSample.TMM.tsv

## 通路富集
## Normal->IM->IGC
${Rscript_clusterProfiler} ${scripts_path}/mRNA_addanalyse/mfuzz_cluster_gobp.R \
--diff_exp_file ${work_dir}/finalPlot/mrna/mfuzz_v2/mfuzz_plot_IGC.tsv \
--out_path ${work_dir}/finalPlot/mrna/mfuzz_v2/IGC

## Normal->IM->DGC
${Rscript_clusterProfiler} ${scripts_path}/mRNA_addanalyse/mfuzz_cluster_gobp.R \
--diff_exp_file ${work_dir}/finalPlot/mrna/mfuzz_v2/mfuzz_plot_DGC.tsv \
--out_path ${work_dir}/finalPlot/mrna/mfuzz_v2/DGC


####################################################################################
## 第二部分
## 显著突变基因
####################################################################################
## 报道的显著突变基因
mkdir -p ${work_dir}/finalPlot/SMG_Waterfull/

cp ${mutsig_check_path}/smg.list ${mutsig_check_path}/smg.addEvolution.list 
cp ${mutsig_check_path}/igc_smg.list ${mutsig_check_path}/igc_smg.addEvolution.list 
cp ${mutsig_check_path}/dgc_smg.list ${mutsig_check_path}/dgc_smg.addEvolution.list 

echo "Gene_Symbol" > ${mutsig_check_path}/All_driver.list
cat ${mutsig_check_path}/dgc_smg.addEvolution.list ${mutsig_check_path}/igc_smg.addEvolution.list ${mutsig_check_path}/im_smg.list | grep -v Gene | sort -u \
>> ${mutsig_check_path}/All_driver.list

##########################################
## 多少IM、IGC、DGC携带突变
driverGene=`cat ${mutsig_check_path}/smg.list | grep -v Gene_Symbol  | tr '\n' '|' | sed 's/|$//'`
Variant_Type="Missense_Mutation|Nonsense_Mutation|Frame_Shift_Ins|Frame_Shift_Del|In_Frame_Ins|In_Frame_Del|Splice_Site|Nonstop_Mutation"

igc_sample=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv | grep -v MSI | grep IGC | grep -v DGC | \
awk -F'\t' '{print $1}' | tr '\n' '|' | sed 's/|$//'`
dgc_sample=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv | grep -v MSI | grep DGC | grep -v IGC | \
awk -F'\t' '{print $1}' | tr '\n' '|' | sed 's/|$//'`
im_sample=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv | grep -v MSI | grep NJMU | grep -v "IGC + DGC" | \
awk -F'\t' '{print $1}' | tr '\n' '|' | sed 's/|$//'`

igc_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv | grep -v MSI | grep IGC | grep -v DGC | wc -l`
dgc_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv | grep -v MSI | grep DGC | grep -v IGC | wc -l`
im_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv | grep -v MSI | grep NJMU | grep -v "IGC + DGC" | wc -l`

igc_mut_num=`cat ${work_dir}/maf_public/All_use.maf | grep -E -w ${driverGene} | grep -E -w ${Variant_Type} | grep -E -w ${igc_sample} | \
awk -F'\t' '{print $3}' | sort -u | wc -l`
dgc_mut_num=`cat ${work_dir}/maf_public/All_use.maf | grep -E -w ${driverGene} | grep -E -w ${Variant_Type} | grep -E -w ${dgc_sample} | \
awk -F'\t' '{print $3}' | sort -u | wc -l`
im_mut_num=`cat ${work_dir}/maf_public/All_use.IM.maf | grep -E -w ${driverGene} | grep -E -w ${Variant_Type} | grep -E -w ${im_sample} | \
awk -F'\t' '{print $3}' | sort -u | wc -l`

echo -e "igc_sample_num\tigc_mut_num"
echo -e $igc_sample_num"\t" ${igc_mut_num}

echo -e "dgc_sample_num\tdgc_mut_num"
echo -e $dgc_sample_num"\t" ${dgc_mut_num}

echo -e "im_sample_num\tim_mut_num"
echo -e $im_sample_num"\t" ${im_mut_num}

##########################################
## 描述20个驱动基因在不同研究的突变情况
###20个基因在不同研究中的IGC和DGC的突变率情况

## 突变率计算（IM，GC,IGC,DGC）和figure4验证，结果一致
## MutRate.tsv
${Rscript} ${scripts_path}/mutsig/mutRate_compute.R \
--maf_cancer_file ${maf_public_path}/All_use.maf \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--images_path ${mutsig_check_path}/ \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv

## 20个基因在不同研究中的IGC和DGC的突变率情况
## Supplyment Fig12的A图
# mutRate_byGroup_DGC.pdf
# mutRate_byGroup_IGC.pdf
${Rscript} ${scripts_path}/mutsig/mutRate_compare_fiveCohort.R \
--mutRate_file ${mutsig_check_path}/MutRate.tsv \
--driver_list ${mutsig_check_path}/All_driver.list \
--out_path ${work_dir}/finalPlot/SMG_Waterfull

##########################################
## 描述20个驱动基因在不同研究的突变情况
## 各基线特征的，吸烟，饮酒，hp的突变率计算
## Supplyment Fig13
## MutRate_baseline.tsv
${Rscript} ${scripts_path}/finalPlot/mutRate_compute_baseline.R \
--maf_cancer_file ${maf_public_path}/All_use.maf \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--images_path ${work_dir}/finalPlot/SMG_Waterfull/mutRate_forbaseline \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv

## 20个基因在各基线特征上的情况
# 吸烟
${Rscript} ${scripts_path}/finalPlot/mutRate_plot.Tobacco.R \
--mut_rate_gene_file ${work_dir}/finalPlot/SMG_Waterfull/mutRate_forbaseline/MutRate_baline_Tobacco.tsv \
--smg_file ${mutsig_check_path}/All_driver.list \
--images_path ${work_dir}/finalPlot/SMG_Waterfull/mutRate_forbaseline/smoking

# 饮酒
${Rscript} ${scripts_path}/finalPlot/mutRate_plot.Alcohol.R \
--mut_rate_gene_file ${work_dir}/finalPlot/SMG_Waterfull/mutRate_forbaseline/MutRate_baline_Alcohol.tsv \
--smg_file ${mutsig_check_path}/All_driver.list \
--images_path ${work_dir}/finalPlot/SMG_Waterfull/mutRate_forbaseline/Alcohol

# hp
${Rscript} ${scripts_path}/finalPlot/mutRate_plot.HP.R \
--mut_rate_gene_file ${work_dir}/finalPlot/SMG_Waterfull/mutRate_forbaseline/MutRate_baline_HP.tsv \
--smg_file ${mutsig_check_path}/All_driver.list \
--images_path ${work_dir}/finalPlot/SMG_Waterfull/mutRate_forbaseline/HP

##########################################
## 突变瀑布图，，对应的Figure4的a图
## Mut_WaterFall.new.pdf
${Rscript} ${scripts_path}/plot/waterfull_smg.new_v2.R \
--maf_path ${maf_public_path} \
--images_path ${work_dir}/finalPlot/SMG_Waterfull/ \
--info_file ${config_path}/tumor_normal.class.list \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--class_order_file ${config_path}/Class_order.list \
--im_list ${mutsig_check_path}/im_smg.list \
--igc_list ${mutsig_check_path}/igc_smg.list \
--dgc_list ${mutsig_check_path}/dgc_smg.list

##########################################
## 突变率的比较，对应的Figure4的b图
from=All
${Rscript} ${scripts_path}/plot/mutRate_plot.All.R \
--smg_file ${mutsig_check_path}/smg.list \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.tsv \
--from ${from} \
--images_path ${work_dir}/finalPlot/SMG_Waterfull/

##########################################
## 分ICG和DGC分别看共出现和互斥，对应的Supplyment Fig7a
## conOccurrence_exclussive.IGC.pdf
## conOccurrence_exclussive.DGC.pdf
class_type_list=("IGC" "DGC")
for class_type in ${class_type_list[@]}
do
## 计算
${Rscript} ${scripts_path}/plot/DriverGene.Exclusive.R \
--maf_path ${maf_public_path} \
--class_type ${class_type} \
--images_path ${work_dir}/finalPlot/SMG_Waterfull/ \
--info_file ${config_path}/tumor_normal.class.list \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--class_order_file ${config_path}/Class_order.list \
--im_list ${mutsig_check_path}/im_smg.list \
--igc_list ${mutsig_check_path}/igc_smg.list \
--dgc_list ${mutsig_check_path}/dgc_smg.list
## 画图
${Rscript} ${scripts_path}/plot/heatMap.con_exclusive.R \
--class_type ${class_type} \
--input_file ${work_dir}/finalPlot/SMG_Waterfull/MutuallyExclusive.${class_type}.tsv  \
--images_path ${work_dir}/finalPlot/SMG_Waterfull/
done


####################################################################################
## 第三部分克隆演化
####################################################################################

mkdir -p ${work_dir}/finalPlot/SMG_Evolution/

##########################################
#### 突变棒棒糖图
## NMU的样本中,比较配对IM及其IGC/DGC的突变的分布，对应的Figure5的b、c、d，Supplyment Fig9，Supplyment Fig11，Supplyment Fig13-15的棒棒糖图
for gene in `cat ${mutsig_check_path}/smg.list | grep -v Gene_Symbol | sort -u`
do
${Rscript} ${scripts_path}/plot/Lollipop_variant.R \
--gene ${gene} \
--pre_file ${maf_path}/All_GGA.precancer.maf \
--cancer_file ${maf_path}/All_GGA.cancer.maf \
--sample_info ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.annotation.exonNum.gtf \
--out_path ${Images_path}/lollipop/${gene}_NMU
done


##########################################
## 计算每个驱动基因在每个样本的CCF，判断其为克隆还是亚克隆，不产生文章图
echo "Gene_Symbol" > ${mutsig_check_path}/show_gene.list
cat ${Images_path}/selectGCClone/GCClone_gene.all_record.list ${mutsig_check_path}/smg.list | grep -v Gene | sort -u 、
>> ${mutsig_check_path}/show_gene.list

${Rscript} ${scripts_path}/tree/GetDriverEverySample.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${mutsig_check_path}/show_gene.list  \
--sample_info ${config_path}/tumor_normal.class.MSS_MSI.list \
--out_path ${Images_path}/DriverEverySample \
--class_order_file ${config_path}/Class_order_sub.list 

##########################################
## 分类驱动基因为Maintained、IM favored和GC favored，标记其是共享、IM私有还是胃癌私有，IGC和DGC的样本分开来看，对应的Figure4的c图
## GeneTrunk.evolutionChoose.ratio.IGC.pdf
## GeneTrunk.evolutionChoose.ratio.DGC.pdf
clone_t=0.6
class_type_list=("IGC" "DGC")
for class_type in ${class_type_list[@]}
do
## 驱动基因的共享和私有情况
${Rscript} ${scripts_path}/plot/JudgeGeneDriverSharePrivate.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--gene_list ${mutsig_check_path}/smg.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--type ${class_type} \
--clone_t ${clone_t} \
--out_path ${work_dir}/finalPlot/SMG_Evolution/share_private
done

## 计算GC favored基因的突变率
gc_favored_list="ARID1A|ARID2|CTNNB1|ERBB2|FBXW7|MAP2K7|RHOA|RNF43|SMAD4|TGFBR2"
Variant_Types="Missense_Mutation|Nonsense_Mutation|Frame_Shift_Ins|Frame_Shift_Del|In_Frame_Ins|In_Frame_Del|Splice_Site|Nonstop_Mutation"

cat ${MutationTime_path}/result/All_CCF_mutTime.addShare.rmMIX.tsv | grep -E -w ${gc_favored_list} | grep -E -w ${Variant_Types} | \
awk -F'\t' '{print $32}' | sort -u | wc -l


##########################################
## 描述GC驱动基因Maintained以及GC favored在10例发生TP53的Share突变患者中的克隆选择以及CNV情况，IGC和DGC的样本分开来看，对应的Figure5的a图
## Mut_WaterFall.SortyBySample.Driver.pdf
${Rscript} ${scripts_path}/finalPlot/waterfull_smg.SortBySample.v1.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--images_path ${work_dir}/finalPlot/SMG_Evolution/ \
--info_file ${config_path}/tumor_normal.class.list \
--class_order_file ${config_path}/Class_order.list \
--smg_list ${Images_path}/selectGCClone/GCClone_gene.all_record.list  \
--class_order_sub_file ${config_path}/Class_order_sub.list \
--tp53_pre_file ${Images_path}/lollipop/TP53_NMU/TP53.PreCancerous.UniqueNormal.tsv \
--tp53_cancer_file ${Images_path}/lollipop/TP53_NMU/TP53.Cancerous.UniqueNormal.tsv 


##########################################
## Trunk以及Private的APC突变，发生最小拷贝数丢失占比在总的、IGC和DGC的情况，对应的Supplyment Fig9a
${Rscript} ${scripts_path}/finalPlot/JudgeGeneDriverLOH.Gene.R \
--geneN APC \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${Images_path}/selectGCClone/GCClone_gene.all_record.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${work_dir}/finalPlot/SMG_Evolution/LOH


##########################################
## CDH1的share突变、私有突变以及野生型的表达，对应的Supplyment Fig9b
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.TrunkvsPrivate.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--mut_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv \
--out_path ${work_dir}/finalPlot/SMG_Evolution/expression \
--gene CDH1


##########################################
## CDH1可变剪接可视化，对应的Figure5的c图的右边
mkdir -p ${work_dir}/finalPlot/SMG_Evolution/cdh1

cat ~/20220915_gastric_multiple/rna_combine/analysis/config/tumor_normal.list  | awk -F'\t' '{OFS="\t"}{print $0,$1"_"$5}' \
> ${work_dir}/finalPlot/SMG_Evolution/cdh1/Tumor_Normal_RNA.tsv

## 1、提取CDH1的bam文件
for sample in `cat ${config_path}/tumor_normal.class.list  | grep DGC- | grep -v IGC | awk -F'\t' '{print $1"_"$5}'`
do
rna_id=`cat ${work_dir}/finalPlot/SMG_Evolution/cdh1/Tumor_Normal_RNA.tsv | grep ${sample} | awk -F'\t' '{print $3}'`
rna_id_exist=`cat ${work_dir}/finalPlot/SMG_Evolution/cdh1/Tumor_Normal_RNA.tsv | grep ${sample} | awk -F'\t' '{print $3}' | wc -l`

if [ ${rna_id_exist} -gt 0 ]
then
samtools view -hb ~/20220915_gastric_multiple/rna_combine/STAR/${rna_id}.Aligned.sortedByCoord.out.bam 16:68771128-68867487 \
> ${work_dir}/finalPlot/SMG_Evolution/cdh1/${sample}.cdh1.bam
samtools index ${work_dir}/finalPlot/SMG_Evolution/cdh1/${sample}.cdh1.bam
fi
done

## CDH1对应的覆盖区域
#16:68836036-68856036
## 2、在IGV中，选择DGC002（case）以及DGC011（control），对比发生splice site的位点是否发生可变剪接

##########################################
## 每个基因，其所在样本的克隆演化树以及基因克隆选择情况
Variant_Types="Missense_Mutation|Nonsense_Mutation|Frame_Shift_Ins|Frame_Shift_Del|In_Frame_Ins|In_Frame_Del|Splice_Site|Nonstop_Mutation"

<<EOF
## 最后不展示
## TP53的对应的Figure5的a图
gene=TP53
## IGC和PIK3CA共存
Normal=JZGCWES731
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf
## DGC和RHOA共存
Normal=JZGCWES0908
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf
EOF

## APC的对应的Figure5的b图
gene=APC
Normal=S63
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf
Normal=JZGCWES0941
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf

## PIK3CA的对应的Figure5的d图
gene=PIK3CA
Normal=JZ464B
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf
Normal=JZGCWES1033
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf

## CDH1在DGC的对应的Figure5的c图
## 包含了SMAD4
gene=CDH1
Normal=JZ618B
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf
## CDH1在IGC的样本情况，对应的Supplyment Fig9
Normal=JZGCWES0698
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf

## SMAD4，对应的Supplyment Fig14a
gene=SMAD4
Normal=JZ585B
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf

## FBXW7，对应的Supplyment Fig14b
gene=FBXW7
Normal=JZ553B
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf
Normal=JZGCWES1009N
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf

## RHOA，对应的Supplyment Fig14c
gene=RHOA
Normal=JZGCWES0908
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf

## RNF43和MAP2K7的Share突变，对应的Supplyment Fig15b
gene=RNF43
Normal=JZ549B
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf
Normal=JZ664B
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf

## ARID1A的Share突变，对应的Supplyment Fig15c
gene=ARID1A
Normal=JZGCWES1011
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf

## ERBB2的Share突变，对应的Supplyment Fig15d
gene=ERBB2
Normal_list=(JR8 JZ539B JZ645B)
for Normal in ${Normal_list[@]}
do
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${Normal} | awk -F'\t' '{print $1}' | sort -u`
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_variants.csv
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${work_dir}/finalPlot/SMG_Evolution/${gene}.${Normal}_Driver.pdf
done


####################################################################################
## 第四部分MUC6在IM受到进化选择的可能机制
####################################################################################
##########################################

mkdir -p ${work_dir}/finalPlot/MUC6_BMP6_CFTR 

##########################################
## MUC6的recurrent突变总的占比、其突变的share和private以及克隆和亚克隆,对应的Figure7b
## MUC6.Recurrent_Ratio.pdf
## MUC6.Share_Private.pdf
## MUC6.Share_Clone.pdf
## MUC6.Driver_Con-occurence.pdf
${Rscript} ${scripts_path}/finalPlot/MUC6.recurrent.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR \
--gene_list_file ${Images_path}/selectGCClone/GCClone_gene.all_record.report.list

##########################################
## 四个患者的整体单细胞聚类图，以及marker基因染色图
${Rscript_singlecell} ${scripts_path}/singlecell/markergene_umap.R \
--singleCell_sample_file ${config_path}/singleCell_Sample.useThree.list \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/singleCellRatio \

##########################################
## MUC6突变患者在IM中不同细胞的比例构成，柱状图和散点图，对应的Figure7c
## STAD_MUC6_mutwild.IM.bar.CellRate.pdf
## STAD_MUC6_mutwild.IM.box.CellRate.pdf
gene=MUC6
${Rscript_singlecell} ${scripts_path}/singlecell/compare_CellRatio.R \
--sample_list_file ${config_path}/tumor_normal.class.MSS_MSI.rmMIX.list \
--singleCell_sample_file ${config_path}/singleCell_Sample.useThree.list \
--maf_file ${maf_path}/All_GGA.all.maf \
--maf_msi_file ${maf_path}/All_GGA.all.MSI.maf \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/singleCellRatio \
--gene ${gene}


##########################################
## 在有MUC6突变的样本中，计算MUC6突变与哪个细胞相关，对应的Figure7d-e
## Scissor_STAD_MUC6_mutation.IM.CellRate.all.Counts.pdf
## Scissor_STAD_MUC6_mutation.IM.all.cell.pdf
## 运行约1天
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.MUC6.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTPM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path  ${work_dir}/finalPlot/MUC6_BMP6_CFTR/scissor \
--mut_type all \
--gene MUC6
## 画图
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.Counts.R \
--input_im_file ${work_dir}/finalPlot/MUC6_BMP6_CFTR/scissor/Scissor_STAD_MUC6_mutation.IM.CellRate.all.tsv \
--single_cell_file  ${work_dir}/finalPlot/MUC6_BMP6_CFTR/scissor/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/scissor \
--type all \
--gene MUC6


##########################################
## 细胞互作，对应的Figure7f
## 运行约12h
## IM_MUC6.heatmap.pdf
## IM_MUC6.cpdb.pdf
${Rscript_cpbd} ${scripts_path}/singlecell/cellphonedb.R \
--single_cell_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--out_path ${Images_path}/singleCell_MUC6/cpdb
## 运行cellphonedb
${python_cpbd} ${scripts_path}/singlecell/cellphonedb.py
## 可视化
${Rscript_cpbd} ${scripts_path}/singlecell/ktplots.R \
--pvals_file ${Images_path}/singleCell_MUC6/cpdb/statistical_analysis_pvalues_IM_MUC6.txt \
--means_file ${Images_path}/singleCell_MUC6/cpdb/statistical_analysis_means_IM_MUC6.txt \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/cpdb

## 检查观察到的受体配体DGC2-DSG2互作的基因的表达分布，对应的Supplyment Fig13
## DSG2.vln.IM.pdf
## DSG2.FeaturePlot.IM.pdf
## DSC2.vln.IM.pdf
## DSC2.FeaturePlot.IM.pdf
gene_list=( DSC2 DSG2 )
for gene in ${gene_list[@]}
do
${Rscript_singlecell} ${scripts_path}/singlecell/showGene.vln.MUC6.R \
--single_cell_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--gene ${gene} \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/cpdb
done

##########################################
## 突变型Pit和野生型Pit的差异表达，对应的Figure7g
## DiffGene.Pit.pdf
${Rscript_singlecell} ${scripts_path}/singlecell/differexpression.MUC6.R \
--single_cell_file ${work_dir}/finalPlot/MUC6_BMP6_CFTR/scissor/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--gene MUC6 \
--cds_file ~/ref/PCAWG_Elements/web_hg19/gc19_pc.cds.use.bed \
--pathway_path ~/ref/Pathway/ \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/Diff
## 差异表达仅画图同时完成通路富集
${Rscript_clusterProfiler} ${scripts_path}/singlecell/differexpression.MUC6.plot.R \
--input_file ${work_dir}/finalPlot/MUC6_BMP6_CFTR/Diff/DiffGene.Pit.tsv \
--gene MUC6 \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/Diff


## GKN1_GKN2均高表达的Pit细胞比例(在所有Pit细胞中，表达超过中位数)，在MUC6突变相关Pit及其它Pit是否存在差异，对应的Figure7h
## IM_Pit_HighExpressionCellRatio.GKN1_GKN2.combine.pdf
${Rscript_singlecell} ${scripts_path}/singlecell/compare_PitCellRatio.GKN1_GKN2.R \
--sample_list_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--singleCell_sample_file ${config_path}/singleCell_Sample.useThree.list \
--single_cell_scissor_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/Diff \
--gene "GKN1_GKN2"

## MUC6突变和野生型样本，GKN1和GKN2的表达，对应的Figure7i
## GKN1_GKN2.MutVsWild.IM.pdf
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.MutvsWild.MUC6.GKN1_GKN2.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR/Diff \
--gene MUC6


##########################################
## CFTR、BMP6和MTRR的recurrent突变总的占比、其突变的share和private以及克隆和亚克隆，对应的Supplyment Fig12
## CFTR.Recurrent_Ratio.pdf
## CFTR.Share_Private.pdf
## CFTR.Share_Clone.pdf
## CFTR.Driver_Con-occurence.pdf
## BMP6.Recurrent_Ratio.pdf
## BMP6.Share_Private.pdf
## BMP6.Share_Clone.pdf
## BMP6.Driver_Con-occurence.pdf
## MTRR.Recurrent_Ratio.pdf
## MTRR.Share_Private.pdf
## MTRR.Share_Clone.pdf
## MTRR.Driver_Con-occurence.pdf
gene_list=(BMP6 CFTR MTRR)
for gene in ${gene_list[@]}
do
${Rscript} ${scripts_path}/finalPlot/BMP6_CFTR.recurrent.R \
--gene ${gene} \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${work_dir}/finalPlot/MUC6_BMP6_CFTR \
--gene_list_file ${Images_path}/selectGCClone/GCClone_gene.all_record.report.list
done


####################################################################################
## 第5部分进化时间
####################################################################################
## 判断进展时间
## 参考https://gitlab.com/cancer-genomx/ipmn-timing
## Genomic characterization of malignant progression in neoplastic pancreatic cysts（2020.NC）
## 整理输入文件
## 1、胃癌私有的过客突变（Silent）数量
${Rscript} ${scripts_path}/evolutionTime/prepareInput.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${work_dir}/finalPlot/evolutionTime
## 2、突变率从1-10/per year，推测时间
${Rscript_evolutionTime} ${scripts_path}/evolutionTime/timing_summary.R \
--input_file ${work_dir}/finalPlot/evolutionTime/sample_mutNum.tsv \
--code_path ${scripts_path}/evolutionTime/ipmn-timing-master \
--plot_id_file ${config_path}/plotID.list \
--out_path ${work_dir}/finalPlot/evolutionTime

## 时间比较
## 1、IM进展为IGC的时间，IM进展为DGC的时间，对应的Figure 6a, Supplyment Fig11
## CompareIGC_DGC.Time.pdf
## 2、发生Maintained的TP53、APC、CDH1、PIK3CA基因突变是否会加速IM进展为胃癌的时间，分层all、IGC和DGC，对应的Figure6的b图
## CompareTrunkSMG.Time.All.pdf
## CompareTrunkSMG.Time.IGC.pdf
## CompareTrunkSMG.Time.DGC.pdf
## 2、发生Maintained的TP53、APC、CDH1、PIK3CA基因突变其进展时间的95%可信区间，分层IGC和DGC，对应的Figure6的c图
## CompareTrunkSMG.Time.DGC.95CI.pdf
## CompareTrunkSMG.Time.IGC.95CI.pdf
# 增加 饮酒/非饮酒/GS亚型和CIN亚型的比较
type_list=(All Drink No GS CIN)
for type in ${type_list[@]}
do
${Rscript} ${scripts_path}/evolutionTime/compareTime.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--type ${type} \
--gene_list ${mutsig_check_path}/maintained_smg.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--input_file ${work_dir}/finalPlot/evolutionTime/timing_molecular_clock.tsv \
--base_info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--out_path ${work_dir}/finalPlot/evolutionTime/${type}
done
